{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0aa41865-3e1d-4651-9f7e-ba531d185a0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "from scipy.stats import ttest_ind"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fcb00e3f-6193-415e-a5b3-755f428f189b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"E\\df.xlsx\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4a744a9e-4e99-4fb9-9e64-c3863c243d39",
   "metadata": {},
   "source": [
    "# 1. Analysis 1"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1edea229-1013-4147-a730-8e16760df70e",
   "metadata": {},
   "source": [
    "## fig 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0007a001-d794-4f0b-af2f-a296698062cb",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['date'] = pd.to_datetime(df['date'])\n",
    "\n",
    "# Monthly average\n",
    "monthly_avg = df.groupby(['date', 'nationality'])[['valence', 'arousal']].mean().reset_index()\n",
    "\n",
    "# Plot styling\n",
    "sns.set(style=\"whitegrid\")\n",
    "color_map = {\"Korea\": \"#003f5c\", \"China\": \"#8b0000\"}\n",
    "line_style_map = {\"Korea\": \":\", \"China\": \"-\"}\n",
    "\n",
    "# Create subplots\n",
    "fig, axes = plt.subplots(2, 1, figsize=(14, 13), sharex=True)\n",
    "\n",
    "# Plot A: Valence\n",
    "ax1 = axes[0]\n",
    "for nationality in ['Korea', 'China']:\n",
    "    subset = monthly_avg[monthly_avg['nationality'] == nationality]\n",
    "    alpha = 0.4 if nationality == 'China' else 1.0\n",
    "    ax1.plot(subset['date'], subset['valence'],\n",
    "             label=nationality,\n",
    "             color=color_map[nationality],\n",
    "             linestyle=line_style_map[nationality],\n",
    "             alpha=alpha)\n",
    "ax1.set_title(\"(a) Monthly Valence Trend (Korea vs China)\", fontsize=16, pad=10)\n",
    "ax1.set_ylabel(\"Valence\", fontsize=16)\n",
    "ax1.tick_params(axis='x', labelsize=18, rotation=45)\n",
    "ax1.tick_params(axis='y', labelsize=14)\n",
    "ax1.legend(fontsize=15, loc='upper left')\n",
    "\n",
    "ax1.tick_params(labelbottom=True)  # ✅ This line enables x-ticks for subplot (a)\n",
    "\n",
    "# Plot B: Arousal\n",
    "ax2 = axes[1]\n",
    "for nationality in ['Korea', 'China']:\n",
    "    subset = monthly_avg[monthly_avg['nationality'] == nationality]\n",
    "    alpha = 0.6 if nationality == 'China' else 1.0\n",
    "    ax2.plot(subset['date'], subset['arousal'],\n",
    "             label=nationality,\n",
    "             color=color_map[nationality],\n",
    "             linestyle=line_style_map[nationality],\n",
    "             alpha=alpha)\n",
    "ax2.set_title(\"(b) Monthly Arousal Trend (Korea vs China)\", fontsize=16, pad=10)\n",
    "ax2.set_ylabel(\"Arousal\", fontsize=16)\n",
    "ax2.tick_params(axis='x', labelsize=18, rotation=45)\n",
    "ax2.tick_params(axis='y', labelsize=14)\n",
    "ax2.legend(fontsize=15, loc='upper left')\n",
    "\n",
    "# X-axis ticks: only years\n",
    "xticks = pd.date_range(start=monthly_avg['date'].min(), end=monthly_avg['date'].max(), freq='YS')\n",
    "xticklabels = [str(d.year) for d in xticks]\n",
    "for ax in axes:\n",
    "    ax.set_xticks(xticks)\n",
    "    ax.set_xticklabels(xticklabels)\n",
    "\n",
    "# Annotate historical events\n",
    "event_df = df[['date', 'historical_event']].dropna().drop_duplicates(subset='historical_event')\n",
    "y_offsets = [0.02, 0.025, 0.03, 0.035, 0.04, 0.045, 0.05]\n",
    "for i, (_, row) in enumerate(event_df.iterrows()):\n",
    "    for ax in axes:\n",
    "        ax.axvline(row['date'], color='gray', linestyle='--', linewidth=1)\n",
    "        y_pos = ax.get_ylim()[1] + y_offsets[i % len(y_offsets)]\n",
    "        ax.annotate(\n",
    "            row['historical_event'],\n",
    "            xy=(row['date'], ax.get_ylim()[1]),\n",
    "            xytext=(row['date'], y_pos),\n",
    "            textcoords='data',\n",
    "            arrowprops=dict(arrowstyle='->', color='gray'),\n",
    "            fontsize=15,\n",
    "            ha='center',\n",
    "            va='bottom',\n",
    "            color='gray'\n",
    "        )\n",
    "\n",
    "# Final layout\n",
    "plt.tight_layout()\n",
    "plt.subplots_adjust(hspace=0.8)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a418733-a818-4efd-9860-c98205987903",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "ab561618-c43e-47a9-8a9c-b2ecd7d35b95",
   "metadata": {},
   "source": [
    "## fig 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad482888-7c98-43f3-86f1-4b8268f6023b",
   "metadata": {},
   "outputs": [],
   "source": [
    "df['date'] = pd.to_datetime(df['date'])\n",
    "df['year'] = df['date'].dt.year\n",
    "\n",
    "# Create early/late decade labels\n",
    "def label_half_decade(year):\n",
    "    if 2000 <= year < 2005:\n",
    "        return \"early 2000s\"\n",
    "    elif 2005 <= year < 2010:\n",
    "        return \"late 2000s\"\n",
    "    elif 2010 <= year < 2015:\n",
    "        return \"early 2010s\"\n",
    "    elif 2015 <= year < 2020:\n",
    "        return \"late 2010s\"\n",
    "    elif 2020 <= year < 2025:\n",
    "        return \"early 2020s\"\n",
    "    else:\n",
    "        return None\n",
    "\n",
    "df['half_decade'] = df['year'].apply(label_half_decade)\n",
    "df = df.dropna(subset=['half_decade'])  # Drop anything not matching a half-decade\n",
    "\n",
    "# Set plot style and colors\n",
    "sns.set(style=\"whitegrid\")\n",
    "color_map = {\"Korea\": \"#003f5c\", \"China\": \"#8b0000\"}\n",
    "\n",
    "# Create 2-row subplot\n",
    "fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True)\n",
    "\n",
    "# Plot A: Valence\n",
    "valence_ax = sns.boxplot(data=df, x='half_decade', y='valence', hue='nationality', palette=color_map, ax=axes[0])\n",
    "axes[0].set_title(\"(a) Valence Distribution by Period and Country\", fontsize=16, pad=10)\n",
    "axes[0].set_xlabel(\"\")\n",
    "axes[0].set_ylabel(\"Valence\", fontsize=14)\n",
    "axes[0].tick_params(axis='x', labelsize=16)\n",
    "axes[0].tick_params(axis='y', labelsize=14)\n",
    "axes[0].legend(title=\"Country\", fontsize=12, loc='upper left')\n",
    "\n",
    "# Apply hatching to US boxes\n",
    "for patch, artist in zip(axes[0].artists, valence_ax.get_legend_handles_labels()[0]):\n",
    "    pass  # We don't use this loop — use the correct method below\n",
    "\n",
    "# Manually apply hatching to US (even indices)\n",
    "for i, patch in enumerate(axes[0].artists):\n",
    "    if i % 2 == 0:  # US (first hue level)\n",
    "        patch.set_hatch('//')\n",
    "        patch.set_edgecolor('black')\n",
    "\n",
    "# Plot B: Arousal\n",
    "arousal_ax = sns.boxplot(data=df, x='half_decade', y='arousal', hue='nationality', palette=color_map, ax=axes[1])\n",
    "axes[1].set_title(\"(b) Arousal Distribution by Period and Country\", fontsize=16, pad=10)\n",
    "axes[1].set_xlabel(\"\")\n",
    "axes[1].set_ylabel(\"Arousal\", fontsize=14)\n",
    "axes[1].tick_params(axis='x', labelsize=16)\n",
    "axes[1].tick_params(axis='y', labelsize=14)\n",
    "axes[1].legend(title=\"Country\", fontsize=12, loc='upper left')\n",
    "\n",
    "# Manually apply hatching to US (even indices)\n",
    "for i, patch in enumerate(axes[1].artists):\n",
    "    if i % 2 == 0:  # US (first hue level)\n",
    "        patch.set_hatch('//')\n",
    "        patch.set_edgecolor('black')\n",
    "\n",
    "# Final layout and save\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f0620d1-11f1-4a6c-9b59-0b905a7bfefa",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "7d962c0d-1641-4474-a3ed-159a9d7a632b",
   "metadata": {},
   "source": [
    "# Analysis 2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d4d6373a-0c71-452d-a150-183f06234b11",
   "metadata": {},
   "source": [
    "## fig 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3651b78-bff6-4c12-a332-ee0a68cc7717",
   "metadata": {},
   "outputs": [],
   "source": [
    "monthly_df = df.groupby([pd.Grouper(key=\"date\", freq=\"M\"), \"nationality\"]).agg({\n",
    "    \"valence\": \"mean\",\n",
    "    \"arousal\": \"mean\"\n",
    "}).reset_index()\n",
    "\n",
    "pivot_df = monthly_df.pivot(index=\"date\", columns=\"nationality\", values=[\"valence\", \"arousal\"])\n",
    "pivot_df.columns = [\"valence_China\", \"valence_Korea\", \"arousal_China\", \"arousal_Korea\"]\n",
    "pivot_df = pivot_df.dropna()\n",
    "\n",
    "pivot_df[\"valence_diff\"] = pivot_df[\"valence_Korea\"] - pivot_df[\"valence_China\"]\n",
    "pivot_df[\"arousal_diff\"] = pivot_df[\"arousal_Korea\"] - pivot_df[\"arousal_China\"]\n",
    "\n",
    "plt.figure(figsize=(14, 6))\n",
    "\n",
    "plt.plot(pivot_df.index, pivot_df[\"valence_diff\"], label=\"Valence (Korea - China)\", linewidth=2, linestyle=':')\n",
    "plt.plot(pivot_df.index, pivot_df[\"arousal_diff\"], label=\"Arousal (Korea - China)\", linewidth=2)\n",
    "\n",
    "plt.axhline(0, color='gray', linestyle='--', linewidth=0.8)\n",
    "\n",
    "plt.title(\"Monthly Difference in Valence and Arousal: Korea - China\", fontsize=18, pad=10)\n",
    "plt.xlabel(\"\", fontsize=14)\n",
    "plt.ylabel(\"Difference Score\", fontsize=16)\n",
    "\n",
    "years = pd.date_range(start=pivot_df.index.min(), end=pivot_df.index.max(), freq='YS')  # Year Start\n",
    "plt.xticks(ticks=years, labels=[str(y.year) for y in years], fontsize=18, rotation=45)\n",
    "\n",
    "plt.yticks(fontsize=14)\n",
    "plt.axhline(y=0, color='red', linestyle='--', linewidth=0.5)\n",
    "plt.grid(True, linestyle='--', alpha=0.6)\n",
    "plt.legend(fontsize=14)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"difference.png\", dpi=600)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a43c709d-ef47-42e8-8508-7d126fb93421",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "4e0ca714-cca3-472d-9fc3-cafac0c01d7c",
   "metadata": {},
   "source": [
    "## table 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d69eeb9f-9857-4d30-9a9c-e8955d96ac6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.formula.api as smf\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cd8ee69b-2f60-4c0e-a466-b5cd4ed1be15",
   "metadata": {},
   "source": [
    "### valence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f526693-8886-4eda-a417-bca2ae41a9cd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. Aggregate to monthly means by country\n",
    "monthly_df = df.groupby([pd.Grouper(key=\"date\", freq=\"M\"), \"nationality\"]).agg({\n",
    "    \"valence\": \"mean\"\n",
    "}).reset_index()\n",
    "\n",
    "# 3. Create time and interaction variables\n",
    "monthly_df[\"time\"] = (monthly_df[\"date\"] - monthly_df[\"date\"].min()).dt.days // 30\n",
    "monthly_df[\"China\"] = (monthly_df[\"nationality\"] == \"China\").astype(int)\n",
    "monthly_df[\"China_time\"] = monthly_df[\"China\"] * monthly_df[\"time\"]\n",
    "\n",
    "# 4. Prepare X and y for regression\n",
    "X = monthly_df[[\"China\", \"time\", \"China_time\"]]\n",
    "X = sm.add_constant(X)\n",
    "y = monthly_df[\"valence\"]\n",
    "\n",
    "# 5. Fit OLS model with Newey-West standard errors (lag=12 months)\n",
    "model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 12})\n",
    "\n",
    "# 6. Print summary\n",
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c6a6a74-5db0-4a46-88c3-937f84d63ebb",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "e4d9a103-247e-4b79-b26b-d7ed429fc215",
   "metadata": {},
   "source": [
    "## arousal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b53893e-fcb6-4fa6-9cca-4cde371d9c24",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2. Aggregate to monthly means by country\n",
    "monthly_df = df.groupby([pd.Grouper(key=\"date\", freq=\"M\"), \"nationality\"]).agg({\n",
    "    \"arousal\": \"mean\"\n",
    "}).reset_index()\n",
    "\n",
    "# 3. Create time and interaction variables\n",
    "monthly_df[\"time\"] = (monthly_df[\"date\"] - monthly_df[\"date\"].min()).dt.days // 30\n",
    "monthly_df[\"China\"] = (monthly_df[\"nationality\"] == \"China\").astype(int)\n",
    "monthly_df[\"China_time\"] = monthly_df[\"China\"] * monthly_df[\"time\"]\n",
    "\n",
    "# 4. Prepare X and y for regression\n",
    "X = monthly_df[[\"China\", \"time\", \"China_time\"]]\n",
    "X = sm.add_constant(X)\n",
    "y = monthly_df[\"arousal\"]\n",
    "\n",
    "# 5. Fit OLS model with Newey-West standard errors (lag=12 months)\n",
    "model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 12})\n",
    "\n",
    "# 6. Print summary\n",
    "print(model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1cc9c25-7dec-4cc6-a5c2-2b16aac8772f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
